################################################################################
##
## Purpose: This script creates Figure 9
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
## Input Files:
##  - ./data/prepped/finalData.RData: Prepped data from 9_DATA_final_build.R
##  - ./data/prepped/hearings/topic_models_100.RData: Prepped data from 7_DATA_topic_model_prep.R
##
## Output Files:
##  - ./output/figures/MS_figure_9.pdf
##
##
## See associated log file for compute environment, package versions, 
##  and date of most recent run.
##
################################################################################
rm(list = ls())
gc()
require(tidyverse)
require(ggridges)
require(fixest)
require(ggrepel)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()



# Loading Data
load('./data/prepped/finalData.RData')
load('./data/prepped/hearings/topic_models_100.RData')
topWord <- lda_model$get_top_words(n = 50) %>%
  data.frame() %>%
  rename_all(function(x) gsub('X','topic_',x)) %>%
  mutate(top_word = row_number()) %>%
  as_tibble()


# Looking at which topics are interrupted the most
toplot <- utterance_level %>%
  filter(nchars > 0,
         ind > mind,
         !grepl("Yellen",speaker)) %>%
  group_by(interrupted) %>%
  summarise_at(vars(matches('topic_')),mean,na.rm=T) %>%
  gather(key,value,-interrupted) %>%
  filter(!grepl('lag',key)) %>%
  left_join(topWord %>%
              gather(key,term,-top_word) %>%
              group_by(key) %>%
              arrange(top_word) %>%
              slice(1:3) %>%
              summarise(terms = paste(term,collapse = ', ')) %>%
              ungroup() %>%
              mutate(terms = paste0(gsub('topic_','',key),': ',terms)))

# Who uses these topics most?
toplot2 <- utterance_level %>%
  mutate(tmpSpk = ifelse(grepl('Yellen',speaker),'Yellen',
                         ifelse(grepl('Bernanke|Greenspan|Powell',speaker),'Other Fed Chairs','All Others'))) %>%
  filter(nchars > 0,
         ind > mind) %>%
  select(tmpSpk,fullInd,matches('topic_\\d+$')) %>%
  gather(topic,theta,-tmpSpk,-fullInd) %>%
  group_by(tmpSpk,topic) %>%
  summarise(m = mean(theta),
            sd = sd(theta),
            n=n()) %>%
  left_join(topWord %>%
              gather(topic,term,-top_word) %>%
              group_by(topic) %>%
              arrange(top_word) %>%
              slice(1:3) %>%
              summarise(terms = paste(term,collapse = ', ')) %>%
              ungroup() %>%
              mutate(terms = paste0(gsub('topic_','',topic),': ',terms))) 

toplot2 <- toplot %>%
  spread(interrupted,value) %>%
  mutate(diffInt = `1` - `0`) %>%
  left_join(toplot2) 

toplot3 <- toplot2 %>%
  left_join(toplot2 %>%
  filter(tmpSpk != 'All Others') %>%
  select(tmpSpk,terms,m) %>%
  spread(tmpSpk,m))

toplot4 <- toplot3 %>%
  left_join(doc_topic_distr %>%
              mutate(topic = paste0('topic_',topic)) %>%
  group_by(topic) %>%
  summarise(theta = mean(theta))) %>%
  mutate(diffTop = Yellen - `Other Fed Chairs`) %>%
  select(terms,diffInt,diffTop,theta,topic) %>% distinct()

lda_model$get_top_words(n = 10) %>%
  data.frame() %>%
  rename_all(function(x) gsub('X','topic_',x)) %>%
  mutate(top_word = row_number()) %>%
  as_tibble() %>%
  t()

insubs <- c(1, 
            3,
            5, 
            9,
            10,
            11, 
            12,
            19,
            20,
            22,
            24,
            27,
            29,
            31,
            37,
            39,
            41,
            42,
            46,
            47,
            49,
            51,
            54,
            55,
            57,
            69,
            71,
            76,
            83,
            85,
            88,
            90,
            91,
            92,
            93,
            96,
            97)

substantive <- paste0('topic_',setdiff(1:100,insubs))

pdf('./output/figures/MS_figure_9.pdf',width = 7,height = 7)
toplot4 %>%
  filter(terms != 'issue, get, type') %>%
  ggplot(aes(x = diffTop,y = diffInt,label = terms,size = theta,weight= theta)) + 
  geom_point(shape = 21) + 
  geom_smooth(show.legend = F) + 
  geom_vline(xintercept = 0,linetype = 'dashed') + 
  geom_hline(yintercept = 0,linetype = 'dashed') +  
  ylab(bquote('' %<-% ' Topic Interrupted Less ... Topic Interrupted More ' %->% '')) + 
  xlab(bquote('' %<-% ' Topic Used More by Fed Chairs ... Topic Used More by Yellen ' %->% '')) + 
  theme_ridges() + 
  scale_size_continuous(name = 'Prevalence') + 
  theme(axis.title.x = element_text(hjust = .5,vjust = 0),
        axis.title.y = element_text(hjust = .5,vjust = 0),
        legend.position = 'bottom') + 
  geom_text_repel(data = toplot4 %>%
                    filter(topic %in% substantive | abs(diffInt) > .01),
                  size = 3)
dev.off()

# EOF